from configs import vocab_config


def build_from_src(ref_vocab, files):
    for each in files:
        lines = open(vocab_config.BASE_DIR / each).readlines()
        for line in lines:
            words = [each for each in line.split() if each != ""]
            for word in words:
                if word in ref_vocab.keys():
                    ref_vocab[word] += 1
                else:
                    ref_vocab[word] = 0


if __name__ == '__main__':
    vocabs = vocab_config.vocabs
    for each_vocab in vocabs:
        vocab = {}
        name = each_vocab['name']
        build_from_src(vocab, each_vocab['files'])
        output = vocab_config.BASE_DIR / vocab_config.output_dir / (name + ".vocab")
        f = open(output, "w+")
        for k, v in vocab.items():
            f.write("{:s} {:d}\n".format(k, v))
        f.close()
